Load Data

# File generated in /perturbation_16s/analysis/analysis_summer2019/generate_phyloseq.rmd
psSubj <- readRDS("../../data/16S/phyloseq/perturb_physeq_participants_decontam_15Jul19.rds")
psSubj
phyloseq-class experiment-level object
otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
tax_table()   Taxonomy Table:    [ 2425 taxa by 12 taxonomic ranks ]
phy_tree()    Phylogenetic Tree: [ 2425 tips and 2424 internal nodes ]
# otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
# sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
SMP <- data.frame(sample_data(psSubj))
SUBJ <- SMP %>% select(Subject, Age:BirthYear) %>% distinct()
load("output/pairwise_dist_to_baseline_subj_16S.rda")
load("output/fpca_res.rda")
ls()
 [1] "abx_intv_cols"         "bray_to_baseline"      "bray_to_baseline_fltr" "bray.df"               "brayD.ihs"            
 [6] "cc_intv_cols"          "curdir"                "datadir"               "diet_intv_cols"        "fpca.bray.abx"        
[11] "fpca.bray.cc"          "fpca.bray.diet"        "fpca.bray.diet30"      "intv_cols"             "jacc_to_baseline"     
[16] "jacc.df"               "jaccardD"              "pAbx"                  "pAbxLab"               "pBray"                
[21] "pDiet"                 "pDietLab"              "psSubj"                "SMP"                   "SUBJ"                 
[26] "theme_subplot"         "uniFrac_to_baseline"   "uniFrac.df"            "uniFracD.lst"         

Functional PCA

fit_fdapca <- function(
  df, time_column, value_column, replicate_column,
  feat_column = NULL, feat = NULL,
  cluster = FALSE, cmethod = "EMCluster", K = 2, filter = TRUE,
  thresh = 0, num_nonzero = 10, clust_min_num_replicate = 10,
  fpca_optns = NULL, fclust_optns = NULL)
{
  if(!is.null(feat) & !is.null(feat_column)) {
    df <- df %>% 
      filter_at(.vars = feat_column, any_vars((.) == feat))
  }
  y_lst <- plyr::dlply(df, replicate_column, function(x) x[[value_column]])
  t_lst <- plyr::dlply(df, replicate_column, function(x) x[[time_column]])
  if (filter){
    idx <- sapply(y_lst, function(y){sum(abs(y) > thresh) >= num_nonzero})
    y_lst <- y_lst[idx]
    t_lst <- t_lst[idx]
  }  
  if(length(y_lst) == 0) return(NULL)
  feat_res <- NULL
  if(cluster & (length(y_lst) >= clust_min_num_replicate)) {
    feat_res <- try(fdapace::FClust(
      y_lst, t_lst, k = K, optnsFPCA = fpca_optns, optnsCS = fclust_optns))
  } else {
    if(is.null(fpca_optns)) {
      fpca_optns <- list()
    } 
    if (length(y_lst) < clust_min_num_replicate) {
      fpca_optns$dataType <- 'Sparse'
    }
    feat_res <- fdapace::FPCA(y_lst, t_lst, optns = fpca_optns)
    if(cluster){
      feat_res <- list("cluster" = rep(1, length(y_lst)), 
                       "fpca" = feat_res, "clusterObj" = NULL)
    }
  }
  feat_res
}
fitted_values_fpca <- function(obj, derOptns = list(p = 0)) 
{
  selectedK <- NULL; clust_df <- NULL
  if("cluster" %in% names(obj)){
    clust_df <- data.frame(
      "Replicate_ID" = names(obj$fpca$inputData$Ly),
      "Cluster" =  as.character(obj[["cluster"]]),
      stringsAsFactors = FALSE)
    obj <- obj[["fpca"]]
  }
  if(derOptns$p > 0) {
    selectedK <- fdapace::SelectK(obj)$K
    if(!is.finite(selectedK)) selectedK <- ncol(obj$xiEst)
  }  
  fit <- fdapace:::fitted.FPCA(obj, K = selectedK, derOptns = derOptns)
  rownames(fit) <- names(obj$inputData$Ly)
  colnames(fit) <- obj$workGrid
  fit <- reshape2::melt(fit, varnames = c("Replicate_ID", "time")) %>%
    mutate(Replicate_ID = as.character(Replicate_ID))
  if(!is.null(clust_df)){
    fit <- suppressMessages(fit  %>% left_join(clust_df))
  }
  return(fit)
}
fit_dist_to_baseline <- function(
  dist_to_baseline, time_column="RelDay", 
  value_column = "dist_to_baseline", replicate_column = "Subject") 
{
  nGrid <- length(
    seq(min(dist_to_baseline[[time_column]]),
        max(dist_to_baseline[[time_column]])))
  fpca_res <- fit_fdapca(
    dist_to_baseline, time_column, value_column, replicate_column,
    cluster = FALSE, filter = FALSE, fpca_optns = list(nRegGrid = nGrid))
  
  fitted_mean <- data.frame(time = fpca_res$workGrid, value = fpca_res$mu)
  fitted_response <- fitted_values_fpca(fpca_res) %>%
    mutate(Subject = Replicate_ID) %>%
    left_join(
      fitted_values_fpca(fpca_res, derOptns = list(p = 1)) %>%
        mutate(Subject = Replicate_ID) %>%
        rename("deriv" = value)
    )
  return(list(res = fpca_res, mean = fitted_mean, fitted = fitted_response))
}
# this is because some subjects have multiple samples take the same day
bray_to_baseline_fltr <- bray_to_baseline %>% 
  group_by(perturbation, Subject, Group, Interval, RelDay) %>%
  summarise(n = n(), dist_to_baseline = mean(dist_to_baseline)) %>%
  ungroup()
bray_to_baseline_fltr %>% filter(n > 1)

Antibiotics

bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx") %>%
  filter(RelDay >= -50, RelDay <= 60) %>%
  mutate(Interval = factor(Interval, level = names(abx_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") 

fpca.bray.abx <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60))

save(list = c("fpca.bray.abx"), file = "output/fpca_res.rda")
(pAbx <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.abx[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.abx[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

fpca.bray.abx[["fitted"]] <- fpca.bray.abx[["fitted"]] %>%
  mutate(
    Interval = ifelse(fpca.bray.abx[["fitted"]]$time < 0 , "PreAbx",
               ifelse(fpca.bray.abx[["fitted"]]$time >= 0 & fpca.bray.abx[["fitted"]]$time <= 4, "MidAbx",
                      "PostAbx")))
(pAbxDeriv <-  fpca.bray.abx[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))

(pAbxLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.abx[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.abx[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

  # geom_point(
  #     data = abx_bray %>% filter(RelDay > StabTime),
  #     color = "orange", size = 2.5) +
vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pAbx)
print(pAbxDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)

Diet

bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30) %>%
  mutate(Interval = factor(Interval, level = names(diet_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 

fpca.bray.diet30 <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30))

save(list = c("fpca.bray.abx", "fpca.bray.diet", "fpca.bray.diet30"), file = "output/fpca_res.rda")
(pDiet <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.diet30[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.diet30[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(NA, NA), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20))) 

fpca.bray.diet30[["fitted"]] <- fpca.bray.diet30[["fitted"]] %>%
  mutate(
    Interval = ifelse(fpca.bray.diet30[["fitted"]]$time < 0 , "PreDiet",
               ifelse(fpca.bray.diet30[["fitted"]]$time >= 0 & fpca.bray.diet30[["fitted"]]$time <= 4, "MidDiet",
                      "PostDiet")))
(pDietDeriv <-  fpca.bray.diet30[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))

(pDietLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.diet[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.diet[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pDiet)
print(pDietDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)

Colon cleanout

bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -50, RelDay <= 50) %>%
  mutate(Interval = factor(Interval, level = names(cc_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 

fpca.bray.cc30 <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% 
    filter(perturbation == "CC", RelDay >= -30, RelDay <= 30) %>%
    arrange(Subject, RelDay))
Joining, by = c("Replicate_ID", "time", "Subject")
save(list = c("fpca.bray.abx", "fpca.bray.diet","fpca.bray.diet30", 
              "fpca.bray.cc", "fpca.bray.cc30"), 
     file = "output/fpca_res.rda")
(pCC <- bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -30, RelDay <= 30) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.cc30[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.cc30[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(NA, NA), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20))) 

fpca.bray.cc30[["fitted"]] <- fpca.bray.cc30[["fitted"]] %>%
  mutate(Interval = ifelse(fpca.bray.cc30[["fitted"]]$time < 0 , "PreCC","PostCC"))
(pCCDeriv <-  fpca.bray.cc30[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))

(pCCLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -50, RelDay <= 50) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.cc[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.cc[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pCC)
print(pCCDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] fdapace_0.4.1        forcats_0.4.0        stringr_1.4.0        dplyr_0.8.3          purrr_0.3.2          readr_1.3.1         
 [7] tidyr_0.8.3          tibble_2.1.3         ggplot2_3.2.0        tidyverse_1.2.1.9000 RColorBrewer_1.1-2   phyloseq_1.26.1     

loaded via a namespace (and not attached):
 [1] nlme_3.1-140        fs_1.3.1            lubridate_1.7.4     httr_1.4.0          numDeriv_2016.8-1.1 tools_3.5.1        
 [7] backports_1.1.4     utf8_1.1.4          R6_2.4.0            vegan_2.5-5         rpart_4.1-15        Hmisc_4.2-0        
[13] DBI_1.0.0           lazyeval_0.2.2      BiocGenerics_0.28.0 mgcv_1.8-28         colorspace_1.4-1    permute_0.9-5      
[19] ade4_1.7-13         nnet_7.3-12         withr_2.1.2         tidyselect_0.2.5    gridExtra_2.3       compiler_3.5.1     
[25] cli_1.1.0           rvest_0.3.4         Biobase_2.42.0      htmlTable_1.13.1    xml2_1.2.0          labeling_0.3       
[31] scales_1.0.0        checkmate_1.9.4     digest_0.6.20       foreign_0.8-71      rmarkdown_1.14      XVector_0.22.0     
[37] htmltools_0.3.6     base64enc_0.1-3     pkgconfig_2.0.2     dbplyr_1.4.2        htmlwidgets_1.3     rlang_0.4.0        
[43] readxl_1.3.1        rstudioapi_0.10     generics_0.0.2      jsonlite_1.6        acepack_1.4.1       magrittr_1.5       
[49] Formula_1.2-3       biomformat_1.10.1   Matrix_1.2-17       fansi_0.4.0         Rcpp_1.0.1          munsell_0.5.0      
[55] S4Vectors_0.20.1    Rhdf5lib_1.4.3      ape_5.3             stringi_1.4.3       yaml_2.2.0          MASS_7.3-51.4      
[61] zlibbioc_1.28.0     rhdf5_2.26.2        plyr_1.8.4          grid_3.5.1          parallel_3.5.1      crayon_1.3.4       
[67] lattice_0.20-38     Biostrings_2.50.2   haven_2.1.1         splines_3.5.1       multtest_2.38.0     hms_0.5.0          
[73] zeallot_0.1.0       knitr_1.23          pillar_1.4.2        igraph_1.2.4.1      reshape2_1.4.3      codetools_0.2-16   
[79] stats4_3.5.1        reprex_0.3.0        glue_1.3.1          evaluate_0.14       latticeExtra_0.6-28 data.table_1.12.2  
[85] modelr_0.1.4        vctrs_0.2.0         foreach_1.4.4       cellranger_1.1.0    gtable_0.3.0        assertthat_0.2.1   
[91] xfun_0.8            broom_0.5.2         pracma_2.2.5        survival_2.44-1.1   iterators_1.0.10    IRanges_2.16.0     
[97] cluster_2.1.0      
---
title: "Functional PCA"
output: html_notebook
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library("phyloseq")
library("RColorBrewer")
library("tidyverse")
library("fdapace")

datadir <- "../../data/"
curdir <- getwd()
theme_set(theme_bw())
theme_update(text = element_text(20))

theme_subplot <- theme(
      legend.position = "none",
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      panel.background = element_rect(fill = "transparent",colour = NA),
      plot.background = element_rect(fill = "transparent",colour = NA)
    )

abx_intv_cols <- c("PreAbx" = "grey60", "MidAbx" = "#E41A1C", 
                   "PostAbx" = "#00BFC4", "UnpAbx" = "Purple")
diet_intv_cols <- c("PreDiet" = "grey60", "MidDiet" = "#FD8D3C", 
                    "PostDiet" = "#4DAF4A")
cc_intv_cols <- c("PreCC" = "grey60", "PostCC" = "#7A0177") #"#AE017E")

intv_cols <- c(abx_intv_cols, diet_intv_cols, cc_intv_cols, "NoInterv" = "grey60")
```



# Load Data

```{r}
# File generated in /perturbation_16s/analysis/analysis_summer2019/generate_phyloseq.rmd
psSubj <- readRDS("../../data/16S/phyloseq/perturb_physeq_participants_decontam_15Jul19.rds")
psSubj
# otu_table()   OTU Table:         [ 2425 taxa and 4402 samples ]
# sample_data() Sample Data:       [ 4402 samples by 40 sample variables ]
SMP <- data.frame(sample_data(psSubj))
SUBJ <- SMP %>% select(Subject, Age:BirthYear) %>% distinct()
```


```{r}
load("output/pairwise_dist_to_baseline_subj_16S.rda")
load("output/fpca_res.rda")
ls()
```


# Functional PCA 


```{r}
fit_fdapca <- function(
  df, time_column, value_column, replicate_column,
  feat_column = NULL, feat = NULL,
  cluster = FALSE, cmethod = "EMCluster", K = 2, filter = TRUE,
  thresh = 0, num_nonzero = 10, clust_min_num_replicate = 10,
  fpca_optns = NULL, fclust_optns = NULL)
{
  if(!is.null(feat) & !is.null(feat_column)) {
    df <- df %>% 
      filter_at(.vars = feat_column, any_vars((.) == feat))
  }
  y_lst <- plyr::dlply(df, replicate_column, function(x) x[[value_column]])
  t_lst <- plyr::dlply(df, replicate_column, function(x) x[[time_column]])
  if (filter){
    idx <- sapply(y_lst, function(y){sum(abs(y) > thresh) >= num_nonzero})
    y_lst <- y_lst[idx]
    t_lst <- t_lst[idx]
  }  
  if(length(y_lst) == 0) return(NULL)
  feat_res <- NULL
  if(cluster & (length(y_lst) >= clust_min_num_replicate)) {
    feat_res <- try(fdapace::FClust(
      y_lst, t_lst, k = K, optnsFPCA = fpca_optns, optnsCS = fclust_optns))
  } else {
    if(is.null(fpca_optns)) {
      fpca_optns <- list()
    } 
    if (length(y_lst) < clust_min_num_replicate) {
      fpca_optns$dataType <- 'Sparse'
    }
    feat_res <- fdapace::FPCA(y_lst, t_lst, optns = fpca_optns)
    if(cluster){
      feat_res <- list("cluster" = rep(1, length(y_lst)), 
                       "fpca" = feat_res, "clusterObj" = NULL)
    }
  }
  feat_res
}

fitted_values_fpca <- function(obj, derOptns = list(p = 0)) 
{
  selectedK <- NULL; clust_df <- NULL
  if("cluster" %in% names(obj)){
    clust_df <- data.frame(
      "Replicate_ID" = names(obj$fpca$inputData$Ly),
      "Cluster" =  as.character(obj[["cluster"]]),
      stringsAsFactors = FALSE)
    obj <- obj[["fpca"]]
  }
  if(derOptns$p > 0) {
    selectedK <- fdapace::SelectK(obj)$K
    if(!is.finite(selectedK)) selectedK <- ncol(obj$xiEst)
  }  
  fit <- fdapace:::fitted.FPCA(obj, K = selectedK, derOptns = derOptns)
  rownames(fit) <- names(obj$inputData$Ly)
  colnames(fit) <- obj$workGrid
  fit <- reshape2::melt(fit, varnames = c("Replicate_ID", "time")) %>%
    mutate(Replicate_ID = as.character(Replicate_ID))
  if(!is.null(clust_df)){
    fit <- suppressMessages(fit  %>% left_join(clust_df))
  }
  return(fit)
}


fit_dist_to_baseline <- function(
  dist_to_baseline, time_column="RelDay", 
  value_column = "dist_to_baseline", replicate_column = "Subject") 
{
  nGrid <- length(
    seq(min(dist_to_baseline[[time_column]]),
        max(dist_to_baseline[[time_column]])))
  fpca_res <- fit_fdapca(
    dist_to_baseline, time_column, value_column, replicate_column,
    cluster = FALSE, filter = FALSE, fpca_optns = list(nRegGrid = nGrid))
  
  fitted_mean <- data.frame(time = fpca_res$workGrid, value = fpca_res$mu)
  fitted_response <- fitted_values_fpca(fpca_res) %>%
    mutate(Subject = Replicate_ID) %>%
    left_join(
      fitted_values_fpca(fpca_res, derOptns = list(p = 1)) %>%
        mutate(Subject = Replicate_ID) %>%
        rename("deriv" = value)
    )
  return(list(res = fpca_res, mean = fitted_mean, fitted = fitted_response))
}
```


```{r}
# this is because some subjects have multiple samples take the same day
bray_to_baseline_fltr <- bray_to_baseline %>% 
  group_by(perturbation, Subject, Group, Interval, RelDay) %>%
  summarise(n = n(), dist_to_baseline = mean(dist_to_baseline)) %>%
  ungroup()

bray_to_baseline_fltr %>% filter(n > 1)

```

## Antibiotics


```{r bray-abx}
bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx") %>%
  filter(RelDay >= -50, RelDay <= 60) %>%
  mutate(Interval = factor(Interval, level = names(abx_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = abx_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from initial antibiotic dose") +
  ylab("Bray-Curtis distance to 7 pre-antibiotic samples") 
```


```{r, eval = FALSE}
fpca.bray.abx <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60))

save(list = c("fpca.bray.abx"), file = "output/fpca_res.rda")
```

```{r fpca-bray-abx, fig.width=12, fig.height=6.5}
(pAbx <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.abx[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.abx[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))
```


```{r fpca-deriv-bray-abx, fig.width=12, fig.height=6.5}
fpca.bray.abx[["fitted"]] <- fpca.bray.abx[["fitted"]] %>%
  mutate(
    Interval = ifelse(fpca.bray.abx[["fitted"]]$time < 0 , "PreAbx",
               ifelse(fpca.bray.abx[["fitted"]]$time >= 0 & fpca.bray.abx[["fitted"]]$time <= 4, "MidAbx",
                      "PostAbx")))

(pAbxDeriv <-  fpca.bray.abx[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))
```


```{r fpca-bray-abx-labs, fig.width=12, fig.height=6.5}
(pAbxLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Abx", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.abx[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.abx[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = abx_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

  # geom_point(
  #     data = abx_bray %>% filter(RelDay > StabTime),
  #     color = "orange", size = 2.5) +
```

```{r fpca-bray-abx-val-deriv, fig.width=12, fig.height=6.5}
vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pAbx)
print(pAbxDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)
```



## Diet



```{r}
bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30) %>%
  mutate(Interval = factor(Interval, level = names(diet_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = diet_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 
```


```{r, eval = FALSE}
fpca.bray.diet30 <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30))

save(list = c("fpca.bray.abx", "fpca.bray.diet", "fpca.bray.diet30"), file = "output/fpca_res.rda")
```

```{r fpca-bray-diet, fig.width=12, fig.height=6.5}
(pDiet <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -30, RelDay <= 30) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.diet30[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.diet30[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(NA, NA), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20))) 
```


```{r fpca-deriv-bray-diet, fig.width=12, fig.height=6.5}
fpca.bray.diet30[["fitted"]] <- fpca.bray.diet30[["fitted"]] %>%
  mutate(
    Interval = ifelse(fpca.bray.diet30[["fitted"]]$time < 0 , "PreDiet",
               ifelse(fpca.bray.diet30[["fitted"]]$time >= 0 & fpca.bray.diet30[["fitted"]]$time <= 4, "MidDiet",
                      "PostDiet")))

(pDietDeriv <-  fpca.bray.diet30[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))
```


```{r fpca-bray-diet-labs, fig.width=12, fig.height=6.5}
(pDietLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "Diet", RelDay >= -50, RelDay <= 60) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.diet[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.diet[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = diet_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

```


```{r fpca-bray-diet-val-deriv, fig.width=12, fig.height=6.5}
vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pDiet)
print(pDietDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)
```

## Colon cleanout



```{r bray-cc}
bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -50, RelDay <= 50) %>%
  mutate(Interval = factor(Interval, level = names(cc_intv_cols))) %>%
  ggplot(
    aes(x = RelDay, y = dist_to_baseline, 
        group = Subject, color = Interval)) +
  geom_line(aes(group = Subject), alpha = 0.7, lwd = 0.5) + 
  geom_point(alpha = 0.5, size = 1.2) + 
  scale_color_manual(values = cc_intv_cols) + 
  theme(legend.position = "bottom") + 
  guides(colour = guide_legend(override.aes = list(size=3))) +
  xlab("Days from diet initiation") +
  ylab("Bray-Curtis distance to 7 pre-diet samples") 
```


```{r}
fpca.bray.cc30 <- fit_dist_to_baseline(
  bray_to_baseline_fltr %>% 
    filter(perturbation == "CC", RelDay >= -30, RelDay <= 30) %>%
    arrange(Subject, RelDay))

save(list = c("fpca.bray.abx", "fpca.bray.diet","fpca.bray.diet30", 
              "fpca.bray.cc", "fpca.bray.cc30"), 
     file = "output/fpca_res.rda")
```

```{r fpca-bray-cc, fig.width=12, fig.height=6.5}
(pCC <- bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -30, RelDay <= 30) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.cc30[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7, color = "grey30") +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_point(aes(color = Interval), size = 1.5, alpha = 0.7) +
  geom_line(
    data = fpca.bray.cc30[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(NA, NA), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20))) 
```


```{r fpca-deriv-bray-cc, fig.width=12, fig.height=6.5}
fpca.bray.cc30[["fitted"]] <- fpca.bray.cc30[["fitted"]] %>%
  mutate(Interval = ifelse(fpca.bray.cc30[["fitted"]]$time < 0 , "PreCC","PostCC"))

(pCCDeriv <-  fpca.bray.cc30[["fitted"]] %>%
  ggplot(aes(x = RelDay)) +
  geom_line(
    aes(group = Subject, x = time, y = deriv, color = Interval),
    alpha = 0.5, size = 0.7) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(name = "",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  ylab("Derivative"))
```


```{r fpca-bray-cc-labs, fig.width=12, fig.height=6.5}
(pCCLab <- bray_to_baseline_fltr %>% 
  filter(perturbation == "CC", RelDay >= -50, RelDay <= 50) %>%
ggplot(aes(x = RelDay, y = dist_to_baseline)) +
  geom_line(
    data = fpca.bray.cc[["fitted"]],
    aes(group = Subject, x = time, y = value),
    alpha = 0.3, size = 0.7 ) +
  geom_vline(xintercept = 0, lwd = 1, color = "orange") +
  geom_vline(xintercept = 4, lwd = 1, color = "orange") +
  geom_text(
      aes(label = Subject, color = Interval), size = 4, alpha = 0.7) +
  geom_line(
    data = fpca.bray.cc[["mean"]], aes(x = time, y = value),
    color = "navy", size = 2) +
  scale_color_manual(values = cc_intv_cols, name = "Interval") +
  scale_x_continuous(
    name = "Days from initial antibiotic dose",
    limits = c(NA, NA), breaks = seq(-50, 60, 10)) +
  scale_y_continuous(
    name = "Bray-Curtis distance to baseline", 
    limits = c(0.1, 0.85), breaks = seq(0.1, 0.80, 0.1)) +
  theme(text = element_text(size = 20)))

```


```{r fpca-bray-cc-val-deriv, fig.width=12, fig.height=6.5}
vp = grid::viewport(width = 0.3, height = 0.32, x = 0.07, y =0.95, just = c("left", "top"))
print(pCC)
print(pCCDeriv + theme_bw(base_size = 10) + theme_subplot, vp = vp)
```

```{r}
sessionInfo()
```

